package com.rsscollector.feedrss.parser.parserimpl;

import com.rsscollector.feedrss.parser.GenericParser;
import com.rsscollector.feedrss.FeedReader;
import com.rsscollector.util.StringUtil;
import com.rsscollector.util.IO;
import com.sun.syndication.feed.synd.SyndEntry;

import java.net.URL;
import java.net.MalformedURLException;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.log4j.Logger;

public class TuoiTreParser extends GenericParser {

    private static Logger logger = Logger.getLogger(VnExpressParser.class.getName());
    public static final String END_TEXT = "</a>";

    public synchronized String getImageLink(String content, String key, SyndEntry matchStr) {
        String result = null;
        String temp = null;
        if (matchStr != null) {
            try {
                URL urlObj = new URL(matchStr.getLink());
                temp = urlObj.getQuery(); //ArticleID=214229&amp;ChannelID=14
                temp += "'><img";  //ArticleID=214229&amp;ChannelID=14'><img
            } catch (MalformedURLException e) {
                e.printStackTrace();
            }
        }
        if (content != null) {
            int startIndex = content.indexOf(temp);
            if (startIndex < 0) {
                startIndex = content.indexOf(StringEscapeUtils.unescapeHtml(temp));
            }
            while (startIndex >= 0) {
                int endIndex = content.indexOf(END_TEXT, startIndex);
                String sub;
                if (startIndex < endIndex && startIndex >= 0 & endIndex >= 0) {
                    sub = content.substring(startIndex, endIndex);
                    //ArticleID=181935&ChannelID=16'><img src='ImageView.aspx?ArticleID=181935' border=0>
                    //ArticleID=214949&amp;ChannelID=3"><img src="ImageView.aspx?ImageID=210259" border="0" width="150">
                    int startSub = 0;
                    int endSub = 0;
                    if ((startSub = sub.indexOf("<img")) >= 0) {
                        endSub = sub.indexOf(">", startSub + 1);
                        result = sub.substring(startSub, endSub + 1);
                        // result = <img src=ImageView.aspx?ArticleID=181935 border=0>
                    } else {
                        startIndex = content.indexOf(temp, startIndex + 1);
                    }
                }
                if (result != null) {
                    int start = result.trim().indexOf(" ");
                    int end = result.indexOf(" ", start+1);
                    String str = null;
                    if (start >= 0 && end > start && end > 0) {
                        str = result.substring(start+1, end-1).trim();
                        str = str.replaceFirst("src=", "");
                        result = result.replace(str, "http://www.tuoitre.com.vn/Tianyon/"+str);
                    }
                    break;
                }
            }
        }
        return result;
    }

    public StringBuffer getContent(String url) {
        StringBuffer buf = null;
        try {
            String data = IO.getContentFromURL(url);
            if (!StringUtil.isEmpty(data)) {
                int startIndex = data.indexOf("<div id=\"divContent\" style=\"color:#000000; font-weight:bold; font-family:Arial\">");
                int endIndex = data.indexOf("</div>", startIndex);
                int position = 0;
                if (startIndex >= 0 && endIndex >= 0 && startIndex < endIndex) {
                    data = data.substring(startIndex, endIndex);
                    data += "</div>";
                    buf = new StringBuffer(data);
                    // correct image link
                    position = buf.indexOf("<IMG");
                    while (position > 0) {
                        position = buf.indexOf("src=\"", position);
                        position += "src=\"".length();
                        buf.insert(position, "http://www.tuoitre.com.vn/Tianyon/");
                        position = buf.indexOf("<IMG", position);
                    }
                }
            }
        } catch (Exception e) {
            logger.error("Cannot get content at url:" + url, e);
            /*Don't do anything, because have problem with reading url, we will read this site later*/
        }
        return buf;
    }
}
